# Replication file for "Value Shift: Immigration Attitudes and the Sociocultural Divide" by Caroline Marie Lancaster. Please contact cml93@live.unc.edu with any questions. 

# This file compiles the EVS analysis dataset.

# 3 August 2020

library(readstata13)
library(dplyr)
library(magrittr)
library(ggplot2)
library(mirt)
EVS <- read.dta13('evs2017.dta')

#select required variables
EVS2 <- EVS[c('country','v72','v72_DE', 'v73','v73_DE', 'v74', 'v74_DE','v75', 'v75_DE','v76', 'v76_DE','v77', 'v77_DE','v78', 'v78_DE','v80', 'v81', 'v82', 'v83', 'v141', 'v153', 'v154', 'v155', 'v184', 'v185', 'v186', 'v187', 'v188', 'v102','v225','v226','v234','v242', 'v243_edulvlb_1', 'v244','v261','v54','pweight')]

#subset to only West European countries
EVS2 <- subset(EVS2, country == 'Austria' | country == 'Denmark' | country == 'Finland' | country == 'France' | country == 'Netherlands' | country == 'Norway' | country == 'Sweden' | country == 'Switzerland' | country == 'Germany' | country == 'Italy' | country == 'Great Britain')

#this series of for loops recodes response choices
for(i in 2:15){
  EVS2[ , i] <- gsub('neither agree nor disagree (additional category)', NA, EVS2[ , i], fixed = T)
}

for(i in 2:15){
  EVS2[ , i] <- gsub('disagree strongly', 1, EVS2[ , i])
}

for(i in 2:15){
  EVS2[ , i] <- gsub('disagree', 2, EVS2[ , i])
}

for(i in 2:15){
  EVS2[ , i] <- gsub('agree strongly', 4, EVS2[ , i])
}

for(i in 2:15){
  EVS2[ , i] <- gsub('agree', 3, EVS2[ , i])
}


for(i in 2:15){
  EVS2[ , i] <- ifelse(grepl('[0-9]', EVS2[ , i]), EVS2[ , i], NA)
}
#some Germans got a version of these questions with an additional answer choice. This combines the columns
EVS2$v72 <- apply(EVS2[, 2:3], 1, max, na.rm = T)
EVS2$v72[EVS2$v72 <= -Inf] <- NA

EVS2$v73 <- apply(EVS2[, 4:5], 1, max, na.rm = T)
EVS2$v73[EVS2$v73 <= -Inf] <- NA

EVS2$v74 <- apply(EVS2[, 6:7], 1, max, na.rm = T)
EVS2$v74[EVS2$v74 <= -Inf] <- NA

EVS2$v75 <- apply(EVS2[, 8:9], 1, max, na.rm = T)
EVS2$v75[EVS2$v75 <= -Inf] <- NA

EVS2$v76 <- apply(EVS2[, 10:11], 1, max, na.rm = T)
EVS2$v76[EVS2$v76 <= -Inf] <- NA

EVS2$v77 <- apply(EVS2[, 12:13], 1, max, na.rm = T)
EVS2$v77[EVS2$v77 <= -Inf] <- NA

EVS2$v78 <- apply(EVS2[, 14:15], 1, max, na.rm = T)
EVS2$v78[EVS2$v78 <= -Inf] <- NA

#removing the original German columns
EVS2 <- EVS2[,-c(3,5,7,9,11,13,15)]

for(i in 10:12){
  EVS2[ , i] <- gsub('disagree strongly', 1, EVS2[ , i])
}

for(i in 10:12){
  EVS2[ , i] <- gsub('neither agree nor disagree', 3, EVS2[ , i])
}

for(i in 10:12){
  EVS2[ , i] <- gsub('disagree', 2, EVS2[ , i])
}

for(i in 10:12){
  EVS2[ , i] <- gsub('agree strongly', 5, EVS2[ , i])
}

for(i in 10:12){
  EVS2[ , i] <- gsub('agree', 4, EVS2[ , i])
}

for(i in 10:12){
  EVS2[ , i] <- ifelse(grepl('[0-9]', EVS2[ , i]), EVS2[ , i], NA)
}

EVS2[,'v82']<- car::recode(EVS2[,'v82'], "1 = 5; 2 = 4; 3 = 3; 4 = 2; 5 = 1")

for(i in 13:16){
  EVS2[ , i] <- ifelse(EVS2[ , i] <= 0, NA, EVS2[ , i])
}

EVS2$v141 <- abs(EVS2$v141 - 10)              
EVS2$v153 <- abs(EVS2$v153 - 10) 
EVS2$v154 <- abs(EVS2$v154 - 10) 
EVS2$v155 <- abs(EVS2$v155 - 10) 

EVS2[,'v80']<- car::recode(EVS2[,'v80'], "'disagree strongly' = 1;'disagree' = 2;'neither agree nor disagree' = 3; 'agree' = 4; 'agree strongly' = 5")
EVS2$v80 <- ifelse(grepl('[0-9]', EVS2$v80), EVS2$v80, NA)

EVS2[,'v184']<- car::recode(EVS2[,'v184'], "'very good' = 1;'quite good' = 2;'neither good, nor bad' = 3; 'quite bad' = 4; 'very bad' = 5")
EVS2$v184 <- ifelse(grepl('[0-9]', EVS2$v184), EVS2$v184, NA)

for(i in 18:22){
  EVS2[ , i] <- ifelse(EVS2[ , i] <= 0, NA, EVS2[ , i])
}

EVS2$v185 <- abs(EVS2$v185 - 10) 
EVS2$v186 <- abs(EVS2$v186 - 10)  
EVS2$v187 <- abs(EVS2$v187 - 10)  

EVS2$v141[EVS2$v141==0] <- 0
EVS2$v141[EVS2$v141>=1 & EVS2$v141<=2] <- 1
EVS2$v141[EVS2$v141>=3 & EVS2$v141<=5] <- 2
EVS2$v141[EVS2$v141>=6 & EVS2$v141<=7] <- 3
EVS2$v141[EVS2$v141>=8 & EVS2$v141<=9] <- 4

EVS2$v153[EVS2$v153==0] <- 0
EVS2$v153[EVS2$v153>=1 & EVS2$v153<=2] <- 1
EVS2$v153[EVS2$v153>=3 & EVS2$v153<=5] <- 2
EVS2$v153[EVS2$v153>=6 & EVS2$v153<=7] <- 3
EVS2$v153[EVS2$v153>=8 & EVS2$v153<=9] <- 4

EVS2$v154[EVS2$v154==0] <- 0
EVS2$v154[EVS2$v154>=1 & EVS2$v154<=2] <- 1
EVS2$v154[EVS2$v154>=3 & EVS2$v154<=5] <- 2
EVS2$v154[EVS2$v154>=6 & EVS2$v154<=7] <- 3
EVS2$v154[EVS2$v154>=8 & EVS2$v154<=9] <- 4

EVS2$v155[EVS2$v155==0] <- 0
EVS2$v155[EVS2$v155>=1 & EVS2$v155<=2] <- 1
EVS2$v155[EVS2$v155>=3 & EVS2$v155<=5] <- 2
EVS2$v155[EVS2$v155>=6 & EVS2$v155<=7] <- 3
EVS2$v155[EVS2$v155>=8 & EVS2$v155<=9] <- 4

EVS2$v185[EVS2$v185==0] <- 0
EVS2$v185[EVS2$v185>=1 & EVS2$v185<=2] <- 1
EVS2$v185[EVS2$v185>=3 & EVS2$v185<=5] <- 2
EVS2$v185[EVS2$v185>=6 & EVS2$v185<=7] <- 3
EVS2$v185[EVS2$v185>=8 & EVS2$v185<=9] <- 4

EVS2$v186[EVS2$v186==0] <- 0
EVS2$v186[EVS2$v186>=1 & EVS2$v186<=2] <- 1
EVS2$v186[EVS2$v186>=3 & EVS2$v186<=5] <- 2
EVS2$v186[EVS2$v186>=6 & EVS2$v186<=7] <- 3
EVS2$v186[EVS2$v186>=8 & EVS2$v186<=9] <- 4

EVS2$v187[EVS2$v187==0] <- 0
EVS2$v187[EVS2$v187>=1 & EVS2$v187<=2] <- 1
EVS2$v187[EVS2$v187>=3 & EVS2$v187<=5] <- 2
EVS2$v187[EVS2$v187>=6 & EVS2$v187<=7] <- 3
EVS2$v187[EVS2$v187>=8 & EVS2$v187<=9] <- 4

EVS2$v188[EVS2$v188==1] <- 1
EVS2$v188[EVS2$v188>=2 & EVS2$v188<=3] <- 2
EVS2$v188[EVS2$v188>=4 & EVS2$v188<=6] <- 3
EVS2$v188[EVS2$v188>=7 & EVS2$v188<=8] <- 4
EVS2$v188[EVS2$v188>=9 & EVS2$v188<=10] <- 5

EVS2$v226 <- ifelse(EVS2$v226 > 1000, EVS2$v226, NA)
EVS2$cohort <- NA
EVS2$cohort <- ifelse(EVS2$v226 <=1949, 1, EVS2$cohort)
EVS2$cohort <- ifelse(EVS2$v226 >=1950 & EVS2$v226 <=1959, 2, EVS2$cohort)
EVS2$cohort <- ifelse(EVS2$v226 >=1960 & EVS2$v226 <=1969, 3, EVS2$cohort)
EVS2$cohort <- ifelse(EVS2$v226 >=1970 & EVS2$v226 <=1979, 4, EVS2$cohort)
EVS2$cohort <- ifelse(EVS2$v226 >=1980, 5, EVS2$cohort)

EVS2 <- subset(EVS2, !is.na(cohort))

EVS2 %<>% mutate_if(is.character,as.numeric)

#removing homosexuality
EVS2 <- EVS2[,-c(11,14)]

EVS2[,'v225']<- car::recode(EVS2[,'v225'], "'male' = 'male';'female' = 'female';'na (survey break-off)' = NA; 'no answer' = NA; 'dont know' = NA")

EVS2[,'v234']<- car::recode(EVS2[,'v234'], "'married' = 'married';'registered partnership' = 'married';'widowed' = 'previously'; 'divorced' = 'previously'; 'separated' = 'previously'; 'never married and never registered partnership' = 'never'; 'multiple answers Mail' = NA; 'na (survey break-off)' = NA; 'no answer' = NA; 'dont know' = NA")

EVS2[,'v243_edulvlb_1']<- car::recode(EVS2[,'v243_edulvlb_1'], "'Less than primary' = 'Less than HS';'Primary' = 'Less than HS';'Lower secondary' = 'Less than HS'; 'Upper secondary' = 'High school'; 'Post-secondary non tertiary' = 'Vocational'; 'Short-cycle tertiary' = 'Vocational'; 'Bachelor or equivalent' = 'College'; 'Master or equivalent' = 'Advanced'; 'Doctoral or equivalent' = 'Advanced'; 'na (survey break-off)' = NA; 'no answer' = NA; 'dont know' = NA; 'other' =NA")

EVS2[,'v244']<- car::recode(EVS2[,'v244'], "'30h a week or more' = 'Employed';'less then 30h a week' = 'Employed';'self employed' = 'Employed'; 'military service' = 'Employed'; 'retired/pensioned' = 'Pensioner'; 'homemaker not otherwise employed' = 'Homemaker'; 'student' = 'In school'; 'unemployed' = 'Unemployed'; 'disabled' = 'Other';'other' = 'Other'; 'na (survey break-off)' = NA; 'no answer' = NA; 'dont know' = NA; 'multiple answers Mail' = NA")

EVS2[,'v261']<- car::recode(EVS2[,'v261'], "'na (survey break-off)' = NA; 'no answer' = NA; 'dont know' = NA; 'multiple answers Mail' = NA")
EVS2$v261 <- as.numeric(EVS2$v261)

EVS2[,'v54']<- car::recode(EVS2[,'v54'], "'never, practically never' = 1;'less often' = 2;'once a year' = 3; 'only on specific holy days' = 4; 'once a month' = 5; 'once a week' = 6; 'more than once week' = 7; 'na (survey break-off)' = NA; 'no answer' = NA; 'dont know' = NA; 'multiple answers Mail' = NA")

#######
library(mice)
#initializes imputation
ini <- mice(EVS2, maxit = 0)
#this shows the imputation method for each variable
meth <- ini$meth
meth
#all variables are to be imputed with the "pmm" method. This line changes the method of the following variables (variables without a method have no missingness)
meth[c('v225', 'v234', 'v243_edulvlb_1', 'v244', 'v54')] <- "pmm"

pred <- ini$predictorMatrix
pred
#excluding age and population weight from contributing to the imputation of other variables
pred[, c('v242','pweight')] <- 0
imp <- mice(EVS2, meth = meth, pred = pred, print = T, nnet.MaxNWts = 5000)

#you may wish to save the imputation: saveRDS(imp, 'impEVS2.RDS')

#this line turns the imputation into a useable dataset
EVS2 <- mice::complete(EVSimp)

#checking cronbach's alpha for gender
psych::alpha(EVS2[c(2:8, 10:14)])

#making gender variable
mirt1 <- mirt(data=EVS2[c(2:8, 10:14)], model = 1, itemtype="graded", SE=TRUE, verbose=T, na.rm = T)
EVS2$gen <- fscores(mirt1)
EVS2$gen <- as.numeric(EVS2$gen)

#checking cronbach's alpha for immigration
psych::alpha(EVS2[c(9, 15:19)], check.keys = T)

#making immigration variable
mirt2 <- mirt(data=EVS2[c(9, 15:19)], model = 1, itemtype="graded", SE=TRUE, verbose=T, na.rm = T)
EVS2$im <- fscores(mirt2)
EVS2$im <- as.numeric(EVS2$im)

#write data to csv file for further analysis in Stata
write.csv(EVS2, 'EVS2.csv')